# !pip install seaborn
import ipywidgets as widgets
from ipywidgets import interact, interact_manual
# ! pip install --upgrade seaborn
import pandas as pd
wine_data = pd.read_csv('winequality-white.csv')
wine_data.head()
wine_data.columns
wine_data.tail()
len(wine_data)
wine_data.describe()
import seaborn as sns
import matplotlib.pyplot as plt
wine_data = pd.read_csv('winequality-white.csv')
wine_data.columns = ['fixed_acidity', 'volatile_acidity', 'citric_acid', 'residual_sugar',
'chlorides', 'free_sulfur_dioxide', 'total_sulfur_dioxide', 'density',
'pH', 'sulphates', 'alcohol', 'quality']
wine_data.tail(10)
plt.hist(wine_data.alcohol)
sns.distplot(wine_data.alcohol, kde=True) # histogram bars KDE:gaussian kernel density estimate.
around 40% have alcohol content = 9.3
f,ax = plt.subplots(figsize = (15,5))
sns.distplot(wine_data.alcohol, kde=True)
f,ax = plt.subplots(figsize = (15,5))
sns.distplot(wine_data.alcohol, kde=False, rug=True , bins = 200)
rug: Whether to draw a rugplot on the support axis.
bins control granularity of the bars , bins = more size -> you can analyse the data more deep
f,ax = plt.subplots(figsize = (15,5))
sns.rugplot(wine_data.alcohol, height = 0.75) # most of the rug distribution is clustered around centre
f,ax = plt.subplots(figsize = (15,5))
sns.distplot(wine_data.alcohol, kde=True, rug=True, hist = False)
f,ax = plt.subplots(figsize = (15,5))
sns.set(color_codes=True)
sns.kdeplot(wine_data.alcohol, shade=True, color ='r')
f,ax = plt.subplots(figsize=(15,5))
sns.kdeplot(wine_data.alcohol)
sns.kdeplot(wine_data.alcohol, bw = 0.04 , label = 'bw =0.04 ' )
sns.kdeplot(wine_data.alcohol, bw = 0.2 , label = 'bw =0.2 ' )
sns.kdeplot(wine_data.alcohol, bw = 2 , label = 'bw =2 ' )
sns.kdeplot(wine_data.alcohol, bw = 5 , label = 'bw =5 ' )
sns.jointplot(x = wine_data.free_sulfur_dioxide, y= wine_data.total_sulfur_dioxide, xlim={0,150}, ylim={0,400}, alpha=0.2) # ALPHA STILL WORKS HERE
sns.jointplot(x = wine_data.free_sulfur_dioxide, y= wine_data.total_sulfur_dioxide, kind='hex', xlim={0,100}, ylim={0,200})
sns.jointplot(x = wine_data.free_sulfur_dioxide, y= wine_data.total_sulfur_dioxide, kind='kde', xlim={0,80}, ylim={0,300})
f,ax = plt.subplots(figsize = (8,5))
sns.kdeplot(wine_data.free_sulfur_dioxide, wine_data.total_sulfur_dioxide)
sns.rugplot(wine_data.free_sulfur_dioxide, color = 'g',height=0.05)
sns.rugplot(wine_data.total_sulfur_dioxide, color = 'b', height=0.05, vertical = True)
plt.xlim(-20,100)
plt.ylim(-50,400)
# sns.pairplot(wine_data, height=3) # rn this it takes a lot of time
sns.pairplot(wine_data, height=3, vars=['fixed_acidity','chlorides','sulphates', 'alcohol', 'quality'], diag_kind='kde')
sns.pairplot(wine_data, height=3, vars=['chlorides','sulphates', 'quality'], kind='reg')
g= sns.PairGrid(wine_data, height=3, vars=['chlorides','sulphates', 'quality'])
g.map(plt.scatter)
g= sns.PairGrid(wine_data,vars=['chlorides','sulphates', 'alcohol'], hue='quality')
g.map_offdiag(plt.scatter)
g.map_diag(sns.kdeplot)
plt.legend(loc=2)
g=sns.PairGrid(wine_data,vars=['chlorides','sulphates', 'alcohol'])
g.map_diag(sns.kdeplot)
g.map_upper(sns.scatterplot)
g.map_lower(sns.regplot)
g=sns.PairGrid(wine_data,x_vars=['chlorides','sulphates', 'fixed_acidity'], y_vars=['alcohol'])
g.map(plt.scatter)
# g.map_diag(sns.kdeplot)
# g.map_upper(sns.scatterplot)
# g.map_lower(sns.regplot)
g=sns.PairGrid(wine_data,x_vars=['chlorides','sulphates', 'fixed_acidity'], y_vars=['alcohol'])
g.map(sns.scatterplot) # sns looks more beautiful
# sns.pairplot(wine_data, height=3, vars=['fixed_acidity','chlorides','sulphates', 'quality'], hue = 'pH')
corrmat = wine_data.corr()
f,ax = plt.subplots(figsize=(10,10))
# sns.heatmap(corrmat, vmin = -0.9, vmax=0.95, square = True, annot= True, fmt='.2f', cmap='summer' )
sns.heatmap(corrmat, annot=True, fmt='0.2f', square = True)
sns.lmplot(x='residual_sugar', y='density', data = wine_data, height = 7, aspect=2)
sns.lmplot(x='quality', y='alcohol', data=wine_data)
sns.lmplot(x='quality', y='alcohol', data=wine_data, x_jitter=.2)
import numpy as np
sns.lmplot(x='quality', y='alcohol', data=wine_data, x_estimator=np.mean )
# sns.lmplot(x='pH',y='fixed_acidity', data= wine_data, row='quality', hue='alcohol') # Fixed_acidity vs pH in each quality category with hue= alcohol : needs tuning
# sns.lmplot(x='pH',y='fixed_acidity', data= wine_data, col='quality', hue='alcohol') # Fixed_acidity vs pH in each quality category with hue= alcohol
sns.regplot(wine_data.alcohol, wine_data.density, color ='0')
f, ax = plt.subplots(figsize=(12,5))
sns.regplot(x=wine_data.residual_sugar, y=wine_data.density, ax=ax) # controlling size and shape
sns.jointplot(x = wine_data.free_sulfur_dioxide, y= wine_data.total_sulfur_dioxide, xlim={0,150}, ylim={0,400}, kind='reg')
sns.pairplot(wine_data,x_vars=['fixed_acidity', 'citric_acid', 'chlorides'], y_vars=['alcohol'], kind='reg', height=8, aspect=1)
f,ax=plt.subplots(figsize=(15,5))
sns.stripplot(x='quality',y='alcohol',data= wine_data)
f,ax=plt.subplots(figsize=(15,5))
sns.swarmplot(x='quality',y='alcohol',data= wine_data, hue='pH')
sns.dogplot()
f,ax = plt.subplots(figsize=(15,4))
sns.boxplot(x='quality', y='alcohol', data=wine_data)
f,ax = plt.subplots(figsize=(15,4))
sns.violinplot(x='quality', y='alcohol', data=wine_data)
f,ax = plt.subplots(figsize=(15,4))
sns.violinplot(x='quality', y='alcohol', data=wine_data, scale='count')
f,ax = plt.subplots(figsize=(15,4))
sns.violinplot(x='quality', y='alcohol', data=wine_data, scale='count', inner='stick')
f,ax = plt.subplots(figsize=(15,4))
sns.violinplot(x='quality', y='alcohol', data=wine_data)
sns.swarmplot(x='quality',y='alcohol',data= wine_data)
f, ax= plt.subplots(figsize=(15,5))
sns.barplot(x='quality', y='pH', data=wine_data)
f, ax= plt.subplots(figsize=(15,5))
sns.countplot(x='alcohol', data=wine_data)
f, ax= plt.subplots(figsize=(20,5))
sns.countplot(x='alcohol', data=wine_data, color = 'm', palette='Greens_d')
plt.xticks(rotation=90)
f, ax= plt.subplots(figsize=(20,5))
sns.pointplot(x='quality', y='pH', data=wine_data)
# f, ax= plt.subplots(figsize=(28,5))
# sns.pointplot(x='quality', y='pH', data=wine_data, hue='alcohol') # hue here helps us to visualise the variation of the
f,ax = plt.subplots(figsize=(15,8))
sns.boxplot(data= wine_data, orient='h')
titanic = pd.read_csv('titanic.csv')
titanic.head()
g = sns.FacetGrid(titanic, col='Pclass')
g = sns.FacetGrid(titanic, col='Survived')
g.map(plt.hist,'Pclass', color='k')
g = sns.FacetGrid(titanic, col='Survived', height=8)
g.map(plt.scatter,'Age','Fare')
g = sns.FacetGrid(titanic, col='Survived', height=8, hue='Sex')
g.map(plt.scatter,'Age','Fare', alpha= 0.99) # alpha is the opacity
g.add_legend()
g = sns.FacetGrid(titanic, col='Survived', height=8, hue='Sex')
g.map(plt.bar,'Age','Fare', alpha= 0.99) # alpha is the opacity
g.add_legend()
g = sns.FacetGrid(titanic, col='Survived', row = 'Pclass', height=8, hue='Sex')
g.map(plt.bar,'Age','Fare', alpha= 0.99) # alpha is the opacity
g.add_legend()
g = sns.FacetGrid(titanic, col='Survived', row = 'Pclass', height=8, hue='Sex')
g.map(sns.barplot,'Age','Fare')
# g = sns.FacetGrid(titanic, col='Survived', row = 'Pclass', height=8, hue='Sex')
# g.map(sns.regplot,'Age','Fare', fit_reg=False) # alpha is the opacity
# g.add_legend()
g = sns.FacetGrid(titanic, col='Survived', row = 'Pclass', height=8, hue='Sex')
g.map(plt.scatter,'Age','Fare', alpha= 0.99) # alpha is the opacity
g.add_legend()
# How about changing some colors
h={"male":'b',"female":'r'}
g = sns.FacetGrid(titanic, col='Survived', row = 'Pclass', height=8, hue='Sex', palette=h)
g.map(plt.scatter,'Age','Fare', alpha= 0.99) # alpha is the opacity
g.add_legend()
g = sns.FacetGrid(titanic, col='Survived', height=8, col_wrap = 4)
g.map(sns.barplot,'Age','Fare')
g = sns.FacetGrid(titanic, col='Survived', row = 'Pclass', height=8, hue='Sex')
g.map(sns.barplot,'Age','Fare', color='#334488', edgecolor ='red',lw=.5 ) # color is a hex parameter
plt.xticks(rotation=90)
g.fig.subplots_adjust(wspace = 0.3, hspace=0.5) # giving some white space and horizontal spacing
g = sns.FacetGrid(titanic, col='Survived', row = 'Pclass', height=8)
g.map(sns.barplot,'Age','Fare', color='#334488', edgecolor ='red',lw=.5 ) # color is a hex parameter
plt.xticks(rotation=90)
g.fig.subplots_adjust(wspace = 0.3, hspace=0.5) # giving some white space and horizontal spacing
g.set_axis_labels('Age of Passengers','Fare of each Passenger')
g = sns.FacetGrid(titanic, col='Survived', height=8, hue='Sex')
g.map(sns.scatterplot,'Age','Fare') # color is a hex parameter
g.fig.subplots_adjust(wspace = 0.3, hspace=0.5) # giving some white space and horizontal spacing
g.set_axis_labels('Age of Passengers','Fare of each Passenger')
g.set(yticks=[0,50,100,150,200,250,300,350,400,450,500])
g = sns.FacetGrid(titanic, col='Survived', height=5, hue='Sex', xlim=(0,16), ylim=(100,500))
g.map(sns.scatterplot,'Age','Fare') # color is a hex parameter
g.fig.subplots_adjust(wspace = 0.3, hspace=0.5) # giving some white space and horizontal spacing
g.set_axis_labels('Age of Passengers','Fare of each Passenger')
g = sns.FacetGrid(titanic, col='Survived', height=5, hue='Sex')
g.map(sns.scatterplot,'Age','Fare') # color is a hex parameter
g.fig.subplots_adjust(wspace = 0.3, hspace=0.5) # giving some white space and horizontal spacing
g.set_axis_labels('Age of Passengers','Fare of each Passenger')
g.set(xlim=(0,16), ylim=(100,500))
bike = pd.read_csv('bike_sharing_daily.csv')
bike.head()
bike.columns
bike.columns=['instant', 'date', 'season', 'year', 'month', 'holiday', 'weekday',
'workingday', 'weathersituation', 'temp', 'atemp', 'humidity', 'windspeed',
'casual_users', 'registered_users', 'cnt']
# bike.cnt.head(200) bike count per day may go above 8000 per day and some days as low as 100 bikes per day
f,ax = plt.subplots(figsize=(15,5))
sns.set(style="whitegrid")
# sns.set_xticklabels=([0,500,1000,1500,2000,2500,3000,3500,4000,4500,5000,5500,6000,6500,7000,7500,8000,8500,9000])
sns.distplot(bike.cnt, bins=120, kde=False, rug=True, color='k')
f,ax = plt.subplots(figsize=(15,5))
sns.set(style="whitegrid")
# sns.set_xticklabels=([0,500,1000,1500,2000,2500,3000,3500,4000,4500,5000,5500,6000,6500,7000,7500,8000,8500,9000])
sns.distplot(bike.cnt, bins=120, kde=True, rug=True, color='k')
sns.jointplot(bike.instant,bike.cnt, height=8, color='g') # number of bikes hired as a function of day passed
sns.jointplot(bike.month,bike.cnt, height=8, color='g')
sns.jointplot(bike.season,bike.cnt) # counts are higher in season 3 and 4
sns.pairplot(bike,height=6, aspect=1.2,x_vars=['temp', 'humidity', 'windspeed'], y_vars='cnt', hue='season')
# sns.set_style('darkgrid') # applies to all lines ; affects all plots that follow
# f,ax = plt.subplots(figsize=(15,5))
# sns.scatterplot(bike.cnt)
with sns.axes_style('darkgrid'): # applies to only this lines
f,ax = plt.subplots(figsize=(15,5))
sns.distplot(bike.cnt)
sns.jointplot(bike.instant,bike.cnt, height=8, color='g')
sns.despine(offset=15, trim=True)
sns.set()
sns.jointplot(bike.instant,bike.cnt, height=8, color='g')
current_palette = sns.color_palette()
sns.palplot(current_palette)
sns.palplot(sns.color_palette('hls',8)) # hls-> Hue Lightness and Saturation
sns.palplot(sns.hls_palette(n_colors=15, h=.99, l=0.5, s=.99))
sns.choose_colorbrewer_palette()
# !pip install ipywidgets
sns.choose_colorbrewer_palette('diverging')
# sequential color plots ;
sns.palplot(sns.color_palette('Blues'))
# sequential color plots ;
sns.palplot(sns.color_palette('Blues_d'))
sns.choose_colorbrewer_palette('sequential')
sns.palplot(sns.color_palette('cubehelix',12))
sns.palplot(sns.cubehelix_palette(12)) # good for printing
h = sns.choose_colorbrewer_palette('diverging')
h = sns.choose_colorbrewer_palette('sequential')
g=sns.PairGrid(wine_data,x_vars=['chlorides','sulphates', 'fixed_acidity'], y_vars=['alcohol'], hue = 'pH',palette=h, height= 8)
g.map(sns.scatterplot) # sns looks more beautiful
# g.add_legend()
sns.axes_style() # current style
sns.set_style('ticks',{'xtick.major.size':8,'xtick.color': '.15','ytick.color': '.85','ytick.major.size':10,'axes.facecolor': 'm'})
sns.jointplot(bike.instant,bike.cnt, height=8, color='k')
sns.set() # reset everything
sns.jointplot(bike.instant,bike.cnt, height=8, color='k')
sns.set_context('talk')
sns.jointplot(bike.instant,bike.cnt, height=8, color='k')
sns.set_context('paper')
sns.jointplot(bike.instant,bike.cnt, height=8, color='k')
sns.set_context('notebook')
sns.jointplot(bike.instant,bike.cnt, height=8, color='k')
sns.set_context('poster')
sns.jointplot(bike.instant,bike.cnt, height=8, color='k')